# Librarieslibrary(tidyverse)library(gmRi)library(rnaturalearth)library(scales)library(sf)library(gt)library(patchwork)library(lmerTest)library(emmeans)library(merTools)library(tidyquant)library(ggeffects)library(performance)library(gtsummary)library(gt)conflicted::conflict_prefer("select", "dplyr")conflicted::conflict_prefer("filter", "dplyr")conflicted::conflicts_prefer(lmerTest::lmer)# ggplot themetheme_set(theme_gmri(rect =element_rect(fill ="white", color =NA), strip.text.y =element_text(angle =0),axis.text.x =element_text(size =8),legend.position ="bottom"))# vectors for factor levelsarea_levels <-c("GoM", "GB", "SNE", "MAB")area_levels_long <-c("Gulf of Maine", "Georges Bank", "Southern New England", "Mid-Atlantic Bight")# Degree symboldeg_c <-"\u00b0C"
Storycrafting - Northeast Shelf Spectra
I think we need to step back a bit and find the main story again–which is about how size structure of the NES fish community has changed over time, whether that varies by region, and what covariates are associated with those changes.
The Aug draft you developed really focused on comparing regression models to consider drivers based on the fish community size spectrum as viewed from the perspective of the Wigley species set vs. the full species set.
However, I didn’t see figures of those spectra in that draft (maybe there are somewhere else and I’ve lost track). The recent slide deck really focuses on seasonal differences in the size spectra and influence of temperature. - KMills
Starting from Square Uno
I’d like for us to start by rebuilding the story of how the size structure has changed over time and regional differences.
As such, I would like for us to look at plots of the size spectrum slope for (1) the all-species length spectrum and (2) the Wigley species biomass spectrum from annual, seasonal and regional perspectives.
I am thinking this would be two panel plots (all-spp length, Wigley-spp biomass) with 5 subpanels each (NES, GOM, GB, SNE, MAB) that are set up to show lines by seasonal and annual time steps, much like the plot on the right of slide 7 in your most recent slide deck…except with NES added and an annual line added. Keeping the significant trend lines in is helpful. This is the first step, so when you have that ready, please share it with the group. - KMills
Code
# Data used for Wigley estimateswigley_in <-read_csv(here::here("Data/processed/wigley_species_trawl_data.csv"))
Code
# Load the three datasetswigley_lenspectra <-read_csv( here::here("Data/processed/wigley_species_length_spectra.csv")) %>%mutate(survey_area =factor(survey_area, levels = area_levels))wigley_bmspectra <-read_csv( here::here("Data/processed/wigley_species_bodymass_spectra.csv")) %>%mutate(survey_area =factor(survey_area, levels = area_levels))finfish_lenspectra <-read_csv( here::here("Data/processed/finfish_length_spectra.csv")) %>%mutate(survey_area =factor(survey_area, levels = area_levels))# Join them togetherspectra_results <-bind_rows(list("length_spectra"= wigley_lenspectra,"bodymass_spectra"= wigley_bmspectra), .id ="metric") %>%mutate(community ="Wigley Subset") %>%bind_rows(mutate( finfish_lenspectra,metric ="length_spectra",community ="Finfish Community")) %>%mutate(survey_area =fct_relevel(survey_area, area_levels),metric =fct_rev(metric))# Left side:# All species species length spectrum # color = season vs. annual# facet_wrap(~survey_area)# Right side:# Wigley species biomass spectrum# color = season vs. annual# facet_wrap(~survey_area)# Model significant trends separately# Year as RElspectra_mod_ffish <-lmer(formula = b ~ est_year * survey_area * season + (1|est_year),data = finfish_lenspectra)lspectra_mod_wig <-lmer(formula = b ~ est_year * survey_area * season + (1|est_year),data = wigley_lenspectra)bmspectra_mod_wig <-lmer(formula = b ~ est_year * survey_area * season + (1|est_year),data = wigley_bmspectra)# Function to get the Predictions# Drop effect fits that are non-significant ###get_preds_and_trendsignif <-function(mod_x){ modx_preds <-as.data.frame(# Model predictionsggpredict( mod_x, terms =~ est_year * survey_area * season) ) %>%mutate(survey_area =factor(group, levels = area_levels),season =factor(facet, levels =c("Spring", "Fall")))# Just survey area and year modx_emtrend <-emtrends(object = mod_x,specs =~ survey_area*season,var ="est_year") %>%as_tibble() %>%mutate(zero =0,non_zero =if_else(between(zero, lower.CL, upper.CL), F, T))# Preds with signif modx_preds %>%left_join(select(modx_emtrend, survey_area, season, non_zero))}# Get the predictions and flag whether they are significanttrend_predictions <-bind_rows(list("length_spectra-Finfish Community"=get_preds_and_trendsignif(lspectra_mod_ffish),"length_spectra-Wigley Subset"=get_preds_and_trendsignif(lspectra_mod_wig),"bodymass_spectra-Wigley Subset"=get_preds_and_trendsignif(bmspectra_mod_wig) ), .id ="model_id") %>%separate(model_id, into =c("metric", "community"), sep ="-") %>%mutate(metric =fct_rev(metric))# Plot over observed data# Contrast seasonal differences# Make the plotspectra_trends_1g <-ggplot() +geom_ribbon(data =filter(trend_predictions, non_zero ==TRUE),aes(x, ymin = conf.low, ymax = conf.high, fill = season),linewidth =1, alpha =0.35) +geom_point(data = spectra_results,aes(est_year, b, color = season),size =0.8, alpha =0.5) +geom_line(data =filter(trend_predictions, non_zero ==TRUE),aes(x, predicted, color = season),linewidth =1, alpha =1) +scale_fill_gmri() +scale_color_gmri() +facet_grid(survey_area ~ metric*community, scales ="free") +labs(x ="Year",y ="Exponent of Size Spectra")spectra_trends_1g
# Summary tablesbmspectra_mod_wig %>% gtsummary::tbl_regression() %>%add_global_p(keep = T) %>%bold_p(t =0.05) %>%bold_labels() %>%italicize_levels() %>%as_gt() %>%tab_header(title ="Wigley Community - Mass Spectra Trends")
Wigley Community - Mass Spectra Trends
Characteristic
Beta
95% CI
1
p-value
est_year
0.00
0.00, 0.00
0.020
survey_area
0.3
GoM
—
—
GB
-2.8
-6.2, 0.71
0.12
SNE
-2.6
-6.1, 0.93
0.15
MAB
-2.6
-6.1, 0.93
0.15
season
0.5
Fall
—
—
Spring
1.2
-2.3, 4.7
0.5
est_year * survey_area
0.4
est_year * GB
0.00
0.00, 0.00
0.12
est_year * SNE
0.00
0.00, 0.00
0.2
est_year * MAB
0.00
0.00, 0.00
0.2
est_year * season
0.5
est_year * Spring
0.00
0.00, 0.00
0.5
survey_area * season
<0.001
GB * Spring
1.7
-3.2, 6.6
0.5
SNE * Spring
1.3
-3.6, 6.2
0.6
MAB * Spring
-7.3
-12, -2.4
0.004
est_year * survey_area * season
<0.001
est_year * GB * Spring
0.00
0.00, 0.00
0.5
est_year * SNE * Spring
0.00
0.00, 0.00
0.7
est_year * MAB * Spring
0.00
0.00, 0.01
0.003
1
CI = Confidence Interval
I also think it would be valuable to build out the story of size change further, so am thinking that plots like those of average (or median) length (all species) and weight (wigley species) for NES and the sub-regions, like those on p. 12 of the attached file above (the very first draft from late 2022) would be useful. - KMills
Code
# Load the median weight/length datawigley_size_df <-read_csv(here::here("Data/processed/wigley_species_size_summary.csv"))finfish_size_df <-read_csv(here::here("Data/processed/finfish_species_length_summary.csv"))# Join themsize_results <-bind_rows(list("Finfish Community"= finfish_size_df,"Wigley Subset"= wigley_size_df), .id ="community")# Get trends:len_mod_ffish <-lmer(formula = med_len_cm ~ est_year * survey_area * season + (1|est_year),data = finfish_size_df)len_mod_wig <-lmer(formula = med_len_cm ~ est_year * survey_area * season + (1|est_year),data = wigley_size_df)wt_mod_wig <-lmer(formula = med_wt_kg ~ est_year * survey_area * season + (1|est_year),data =drop_na(wigley_size_df, med_wt_kg))# Get the predictions and flag whether they are significantsize_trend_predictions <-bind_rows(list("med_len_cm-Finfish Community"=get_preds_and_trendsignif(len_mod_ffish),"med_len_cm-Wigley Subset"=get_preds_and_trendsignif(len_mod_wig),"med_wt_kg-Wigley Subset"=get_preds_and_trendsignif(wt_mod_wig) ), .id ="model_id") %>%separate(model_id, into =c("metric", "community"), sep ="-") %>%mutate(metric =fct_rev(metric))# Left side: # Median length - all species# color = season vs. annual# facet_wrap(~survey_area)size_long <- size_results %>%pivot_longer(cols =c(med_len_cm, med_wt_kg), names_to ="metric", values_to ="val") %>%mutate(survey_area =fct_relevel(survey_area, area_levels),season =fct_rev(season))# just length for scaleslen_plot <- size_long %>%filter(metric =="med_len_cm") %>%drop_na(val) %>%ggplot() +geom_ribbon(data =filter(size_trend_predictions, metric =="med_len_cm", non_zero ==TRUE),aes(x, ymin = conf.low, ymax = conf.high, fill = season),linewidth =1, alpha =0.35) +geom_point(#data = filter(size_long, metric == "med_len_cm"),aes(est_year, val, color = season),size =0.8, alpha =0.5) +geom_line(data =filter(size_trend_predictions, metric =="med_len_cm", non_zero ==TRUE),aes(x, predicted, color = season),linewidth =1, alpha =1) +scale_fill_gmri() +scale_color_gmri() +facet_grid(survey_area ~ metric*community,scales ="free") +theme(strip.text.y =element_blank()) +labs(x ="Year",y ="Length (cm)")# Right: # Median weight - wigley species# color = season vs. annual# facet_wrap(~survey_area)# weight plotswt_plot <- size_long %>%filter(metric =="med_wt_kg") %>%drop_na(val) %>%ggplot() +geom_ribbon(data =filter(size_trend_predictions, metric =="med_wt_kg", non_zero ==TRUE),aes(x, ymin = conf.low, ymax = conf.high, fill = season),linewidth =1, alpha =0.35) +geom_point(#data = filter(size_long, metric == "med_len_cm"),aes(est_year, val, color = season),size =0.8, alpha =0.5) +geom_line(data =filter(size_trend_predictions, metric =="med_wt_kg", non_zero ==TRUE),aes(x, predicted, color = season),linewidth =1, alpha =1) +scale_fill_gmri() +scale_color_gmri() +facet_grid(survey_area ~ metric*community,scales ="free") +labs(x ="Year",y ="Weight (kg)")(len_plot | wt_plot) +plot_layout(guides ="collect", widths =c(3,1.5)) &theme(legend.position ="bottom")
A couple things have started to catch my eye the more I’ve thought about volatility in these numbers.
There seem to be two situations happening on occassion. The first is more common in GOM+GB, and that is a 5-10cm drop in median length. This sudden drop in size I am imagining is likely due to surges in new recruits.
The other patttern which is more common in MAB but looks like it happens a handful of times in GB is the reverse situation, where median size surges upward in isolated years. For these situations I think we likely could trace these down to exceptionally high catches of larger species like elasmobranchs.
Summary of Trends
Let’s start with these pieces, and we can then discuss doing something with the extremes of the size distribution–changes in small vs. large individuals, where we pre-define size bins as you’ve done in slides 7 and 8, or perhaps using an approach similar to Lora’s approach of defining the upper/lower percentiles of size.
Hope this sounds good as some first steps towards shaping out the results section. And I hope once we see these, we can iterate forward to next steps pretty quickly.
Spiny Dogfish Sensitivity
How much can one species shift the spectra
Code
# # Run MAB with and without spiny dogfish# # Run the year, season, area fits for the filtered data# no_dogs_bmspectra <- group_binspecies_spectra(# ss_input = filter(wigley_in, comname != "spiny dogfish"),# grouping_vars = c("est_year", "season", "survey_area"),# abundance_vals = "numlen_adj",# length_vals = "length_cm",# use_weight = TRUE,# isd_xmin = 1,# global_min = TRUE,# isd_xmax = NULL,# global_max = FALSE,# bin_width = 1,# vdiff = 2)# # Save those# write_csv(# no_dogs_bmspectra,# here::here("Data/processed/wigley_species_nodogfish_bmspectra.csv"))
Code
# Read them in, do some plottingno_dogs_bmspectra <-read_csv( here::here("Data/processed/wigley_species_nodogfish_bmspectra.csv")) %>%mutate(survey_area =factor(survey_area, levels = area_levels))# Format a littleno_dogs_bmspectra <- no_dogs_bmspectra %>%mutate(est_year =as.numeric(est_year))# Join them togetherdfish_results <-bind_rows(list("Wigley Full"= wigley_bmspectra,"Wigley w/o Spiny Dogfish"= no_dogs_bmspectra), .id ="community") %>%mutate(metric ="bodymass_spectra") %>%mutate(survey_area =fct_relevel(survey_area, area_levels))# Get trends for no dogfishnodfish_mod <-lmer(formula = b ~ est_year * survey_area * season + (1|est_year),data = no_dogs_bmspectra)dogfish_sens_predictions <-bind_rows(list("bodymass_spectra-Wigley Full"=get_preds_and_trendsignif(bmspectra_mod_wig),"bodymass_spectra-Wigley w/o Spiny Dogfish"=get_preds_and_trendsignif(nodfish_mod)), .id ="model_id") %>%separate(model_id, into =c("metric", "community"), sep ="-") %>%mutate(metric =fct_rev(metric))# Plot the differencesggplot() +geom_ribbon(data =filter(dogfish_sens_predictions, non_zero ==TRUE),aes(x, ymin = conf.low, ymax = conf.high, fill = season),linewidth =1, alpha =0.35) +geom_point(data = dfish_results,aes(est_year, b, color = season),size =0.8, alpha =0.5) +geom_line(data =filter(dogfish_sens_predictions, non_zero ==TRUE),aes(x, predicted, color = season),linewidth =1, alpha =1) +scale_fill_gmri() +scale_color_gmri() +facet_grid(survey_area ~ metric*community, scales ="free") +labs(x ="Year",y ="Exponent of Size Spectra",title ="Shift in Spectra Trend(s) with Exclusion of Spiny Dogfish")
Large Fish Index
When I looked at large/small fish indices I looked at two metrics:
The large fish index (proportion of biomass total represented by fishes in top 5-10% of body sizes)
The large species index
We will focus on the former for the plots below. Fish size for the below plots will be based on length because these are measured for all individuals.
Large fish will be considered anything over 73cm, representing the 95th percentile.
Values are the fraction of total bodymass for a given season that is coming from the large fishes.
Code
##### Large Fish Index ##### Get 95th percentile as thresholdDescTools::Quantile( wigley_in$length_cm, weights = wigley_in$numlen_adj, probs =c(0.1, 0.5, 0.90))
10% 50% 90%
10 21 55
Code
wigley_in <- wigley_in %>%mutate(large =ifelse(length_cm >73, T, F))# Get total biototal_bio <- wigley_in %>%group_by(survey_area, est_year, season) %>%summarise(total_bio =sum(sum_weight_kg),.groups ="drop")# Get large fish biolf_bio <- wigley_in %>%group_by(survey_area, est_year, season) %>%summarise(large_bio =sum(sum_weight_kg * large),.groups ="drop")# Join and dividelfi <-left_join(total_bio, lf_bio) %>%mutate(LFI = large_bio / total_bio) %>%mutate(survey_area =factor(survey_area, levels = area_levels))# trendz# Get trends for no dogfishlfi_mod <-lmer(formula = LFI ~ est_year * survey_area * season + (1|est_year),data = lfi)lfi_predictions <-get_preds_and_trendsignif(lfi_mod) %>%mutate(survey_area =factor(survey_area, levels = area_levels))# Plot the LFIggplot() +geom_ribbon(data =filter(lfi_predictions, non_zero ==TRUE),aes(x, ymin = conf.low, ymax = conf.high, fill = season),linewidth =1, alpha =0.35) +geom_point(data = lfi, aes(est_year, LFI, color = season),size =0.8, alpha =0.5) +geom_line(data =filter(lfi_predictions, non_zero ==TRUE),aes(x, predicted, color = season),linewidth =1, alpha =1) +scale_fill_gmri() +scale_color_gmri() +scale_y_continuous(limits =c(0,1)) +facet_grid(survey_area~., scales ="free") +labs(x ="Year",y ="Large Fish Index",title ="Large Fish Index",fill ="Season",color ="Season")
The fraction of the biomass held in fishes larger than 73cm is lower in the two northern areas, and falling across both seasons.
In SNE its declining in the spring, and in the MAB its increasing.
Below is a table of some papers operating in the same area or around the same topic that are particularly relevant:
Code
shelf_papers <-tribble(~"Author", ~"Year", ~"Conjecture","Friedland et al.", 2024, "Body size has declined despite more abundance & biomass. Competition is preventing larger sizes.","Krumsick", 2020, "Empirical slopes from trawl survey in labrador sea were steeper than theoretical from nitrogen stable isotope ratios")
Methodology Hangups
I do not have confidence that the values we have for slopes presently:
Represent the actual distribution of the data
Are representative of some meaningful feature of the community
I’ve changed it about fifty times now, and I am concerned that people are now convinced of a story despite data b
Thoughts on Min/Max Parameters
Our hypotheses both relate primarily to abundance of non-age0 fishes. It might make sense to move the minimum size up to something larger. This will accomplish two things: 1) lower any noise in median size or spectra slope that results from large recruit cohorts & 2) help ensure that the size range we’re estimating spectra for are fully selected by the gear.
Currently a 1g minimum size is likely lower than the mesh size selectivity.
This section from Krumsick 2024 describes how this has been handled previously > To account for reduced catchability of small fish,past size- spectrum studies have typically only used size frequency data for size classes larger than the peak size class in observed data (i.e. the descending slope of the observed size-abundance re- lationship) when estimating the slope of sizespectra (e.g.Rice and Gislason 1996, Daan et al. 2005, Krumsick and Fisher 2020).
The following table is a short collection of how authors who cited edwards treated this step:
Code
# Table of papers using mle method and their parameter choices:lme_params <-tribble(~"Author", ~"Year", ~"Data Used", ~"method", ~"xmin", ~"xmax","Wood", 2024, "Hook and line fisheries dependent reef fishes from fishermen interviews", "log10 bins of 15mm", "141mm", "320mm","Letessier", 2024, "Baited Remote Underwater Video Systems BRUVS", "Two scales: normalized spectra w/ log10 bins for latitudinal brackets, MLE method for individual size distribution for each survey day" , "not reported", "not reported","Rice and Gislason", 2006, "Trawl survey of North Sea Fish + multi-species virtual pop. analysis", "lnabundance within a size interval to ln size interval, says 10cm later", "not stated", "not stated","Daan", 2005, "North Sea fisheries independent trawl surveys", "5 & 10cm bins, ln(cpue) within bins", "After inspection of the size rangeconforming to a ln-linear slope: ", "size classes atthe lower (poor retention in the gear) and upper (capturestoo infrequent for robust estimates) end of the range wereexcluded","Krumsick", 2020, "Labrador canada fisheries independent trawl", "...followed recommen-dations provided by Edwards et al. (2017), althoughthe prescribed maximum likelihood estimate approachdeparted from the empirical data (Fig. S3)... then binned", "64g", "not stated")lme_params %>%gt()
Author
Year
Data Used
method
xmin
xmax
Wood
2024
Hook and line fisheries dependent reef fishes from fishermen interviews
log10 bins of 15mm
141mm
320mm
Letessier
2024
Baited Remote Underwater Video Systems BRUVS
Two scales: normalized spectra w/ log10 bins for latitudinal brackets, MLE method for individual size distribution for each survey day
not reported
not reported
Rice and Gislason
2006
Trawl survey of North Sea Fish + multi-species virtual pop. analysis
ln abundance within a size interval to ln size interval, says 10cm later
not stated
not stated
Daan
2005
North Sea fisheries independent trawl surveys
5 & 10cm bins, ln(cpue) within bins
After inspection of the size range conforming to a ln-linear slope:
size classes at the lower (poor retention in the gear) and upper (captures too infrequent for robust estimates) end of the range were excluded
Krumsick
2020
Labrador canada fisheries independent trawl
...followed recommen- dations provided by Edwards et al. (2017), although the prescribed maximum likelihood estimate approach departed from the empirical data (Fig. S3)... then binned
64g
not stated
The following plot shows what the distriution looks like for all data (years, seasons, areas) over the range of sizes we are allowing to contribute to the estimation of seasonal spectra. I’ve highlighted where a minimum body size of 1g sits on this curve to mark where it sits.
The following plot shows what the distriution looks like for all data (years, seasons, areas) over the range of sizes we are allowing to contribute to the estimation of seasonal spectra. I’ve highlighted where a minimum body size of 1g sits on this curve to mark where it sits.
Code
# Broad Distribution#log2 bins bor easy-of-access#' @title Build Log 2 Bin Structure Dataframe#' #' @description Used to build a dataframe containing equally spaced log2 bins for#' size spectra analysis. Contains details on the left and right limits, midpoint, bin width, #' and a text label for the bins. log2bin number ascends with increasing size for eeasy plotting.#'#' @param log2_min #' @param log2_max #' @param log2_increment #'#' @return#' @export#'#' @examplesdefine_log2_bins <-function(log2_min =0, log2_max =13, log2_increment =1){# How many bins n_bins <-length(seq(log2_max, log2_min + log2_increment, by =-log2_increment))# Build Equally spaced log2 bin df log2_bin_structure <-data.frame("log2_bins"=as.character(seq(n_bins, 1, by =-1)),"left_lim"=seq(log2_max - log2_increment, log2_min, by =-log2_increment),"right_lim"=seq(log2_max, log2_min + log2_increment, by =-log2_increment)) %>%mutate(bin_label =str_c(round(2^left_lim, 3), " - ", round(2^right_lim, 3), "g"),bin_width =2^right_lim -2^left_lim,bin_midpoint = (2^right_lim +2^left_lim) /2) %>%arrange(left_lim)return(log2_bin_structure)}
Code
#' @title Assign Manual log2 Bodymass Bins#'#' @description Manually assign log2 bins based on individual length-weight bodymass #' in increments of 1 on the log2 scale. Returns data with bins assigned based on individual#' length-weight biomass#' #' Uses maximum weight, and its corresponding bin as the limit.#'#' @param wmin_grams Catch data prepared for mle calculation, use prep_wmin_wmax#' @param log2_increment Equally spaced increments to use for log 2 bin sizes. Default = 0.5.#'#' @return#' @export#'#' @examplesassign_log2_bins <-function(wmin_grams, log2_increment =1){#### 1. Set up bodymass bins# filter missing weights size_data <- wmin_grams %>%filter(wmin_g >0,is.na(wmin_g) ==FALSE, wmax_g >0,is.na(wmax_g) ==FALSE)# Get bodymass on log2() scale size_data$log2_weight <-log2(size_data$ind_weight_g)# Set up the bins - Pull min and max weights from data available#min_bin <- floor(min(size_data$log2_weight)) min_bin <-0 max_bin <-ceiling(max(size_data$log2_weight))# Build a bin key, could be used to clean up the incremental assignment or for apply style functions log2_bin_structure <-define_log2_bins(log2_min = min_bin, log2_max = max_bin, log2_increment = log2_increment)# Loop through bins to assign the bin details to the data log2_assigned <- log2_bin_structure %>%split(.$log2_bins) %>%map_dfr(function(log2_bin){# limits and labels l_lim <- log2_bin$left_lim r_lim <- log2_bin$right_lim bin_num <-as.character(log2_bin$log2_bin)# assign the label to the appropriate bodymasses size_data %>%mutate(log2_bins =ifelse( between(log2_weight, l_lim, r_lim), bin_num, NA),log2_bins =as.character(log2_bins)) %>%drop_na(log2_bins) })# Join in the size bins log2_assigned <-left_join(log2_assigned, log2_bin_structure, by ="log2_bins")# return the data with the bins assignedreturn(log2_assigned)}
Code
#' @title Calculate Normalized and De-Normalized Abundances#'#' @description For binned size spectra estimation we use the stratified abundance divided by the#' bin widths (normalized size spectra). Another way to present the data is to de-normalize, which #' takes those values and multiplies them by the mid-point of the log-scale bins.#' #' min/max & bin_increments are used to enforce the presence of a size bin in the event that #' there is no abundance. This is done for comparing across different groups/areas that should #' conceivably have the same size range sampled.#'#' @param log2_assigned size data containing the bin assignments to use#' @param min_log2_bin Minimum 2^x value for the size spectra being measured (>=)#' @param max_log2_bin Maximum 2^x value for the size spectra being measured (<)#' @param bin_increment The bin-width on log scale that separates each bin#' @param ... Additional grouping factors with which to aggregate on besides the size bins themselves#'#' @return#' @export#'#' @examplesaggregate_log2_bins <-function( log2_assigned, min_log2_bin =0, max_log2_bin =13, bin_increment =1, ...){# Full Possible Bin Structure# Fills in any gaps log2_bin_structure <-define_log2_bins(log2_min = min_log2_bin, log2_max = max_log2_bin, log2_increment = bin_increment)# Capture all the group levels with a cheeky expand()if(missing(...) ==FALSE){ log2_bin_structure <- log2_bin_structure %>% tidyr::expand(left_lim, distinct(log2_assigned, ...)) %>%full_join(log2_bin_structure) }# Get bin breaks log2_breaks <-sort(unique(c(log2_bin_structure$left_lim, log2_bin_structure$right_lim)))# Get Totals for bodymass and abundances log2_aggregates <- log2_assigned %>%group_by(left_lim, ...) %>%summarise(abundance =sum(numlen_adj, na.rm = T),weight_g =sum(wmin_g, na.rm = T),.groups ="drop")# Join back in what the limits and labels are# The defined bins and their labels enforce the size limits log2_prepped <-left_join(x = log2_bin_structure, y = log2_aggregates)#### Fill Gaps with Zero's?? # This ensures that any size bin that is intended to be in use is actually used log2_prepped <- log2_prepped %>%mutate(abundance =ifelse(is.na(abundance), 1, abundance),weight_g =ifelse(is.na(weight_g), 1, weight_g))#### normalize abundances using the bin widths log2_prepped <- log2_prepped %>%mutate(normalized_abund = abundance / bin_width,normalized_biom = weight_g / bin_width,# # Remove Bins Where Normalized Biomass < 0? No!# normalized_abund = ifelse(normalized_abund < 2^0, NA, normalized_abund),# norm_strat_abund = ifelse(norm_strat_abund < 2^0, NA, norm_strat_abund) )# Add de-normalized abundances (abundance * bin midpoint) log2_prepped <- log2_prepped %>%mutate(denorm_abund = normalized_abund * bin_midpoint,denorm_biom = normalized_biom * bin_midpoint)# Return the aggregationsreturn(log2_prepped)}
I picked 16g here basically eyeballing things, but my point is mostly that I think there is a need and an advantage to shifting the minimum size. We could probably be more scientific and look at the mesh size used (codend liner 1 inch stretched diamond mesh), but I think precision there is less important that being closer than we are now.
In all regions it seems like the typical linear slope begins at a larger size than 1g.
There is a similar lack of tuning for maximum length as well. For maximum size we presently take the maximum individual size in that area-year-season unit. While this also affects the distribution shape I am less concerned with this for now.
Is it even Pareto?
It is quite a pain to determine whether the data follow a pareto distribution in the first place. I’m unsure if the lines in the plots below faithfully follow how they are estimated and how to score goodness of fit for a probability density function.
This section from Edwards 2007 alludes to the ongoing debate on appropriateness of applying size spectra methods to empirical data.
The MLE method avoids binning and regression. Binning in general can be problematic (e.g. if a data set has no body masses <10 g but the lowest bin is defined as 8–16 g), and the choice of bin widths can affect the estimated slope (Vidondo et al. 1997). Regression-based methods are problematic because the intercept and the slope implicitly determine urn:x-wiley:2041210X:media:mee312641:mee312641-math-0064, which can erroneously be greater than some data values (James, Plank & Edwards 2011). They also assume that the errors in the logarithmic counts for each bin have the same variance, which may not be justified. Although regression can be understood in a likelihood context, this is different to explicitly using a likelihood-based method (Edwards et al. 2012).
What if we shifted minimum size to 16g?
I’m just going to go ahead and do it and see:
After doing some attempts to plot the impacts of moving it around, I am less convinced that its helping things and it may even be harming the situation.
In Krumsick et al. 2020 they determined visually that their data from the trawl data did not follow the distribution assumed by the MLE methodology and instead turned to normalized spectra using binned methods.
Code
# Load processing functions#remotes::install_github("andrew-edwards/sizeSpectra")library(sizeSpectra)source(here::here("Code/R/Processing_Functions.R"))# Run the year, season, area fits for the filtered datawigley_bmspectra_16 <-group_binspecies_spectra(ss_input =filter(wigley_in, wmin_g >=16),grouping_vars =c("est_year", "season", "survey_area"),abundance_vals ="numlen_adj",length_vals ="length_cm",use_weight =TRUE,isd_xmin =16,global_min =TRUE,isd_xmax =NULL,global_max =FALSE,bin_width =1,vdiff =2)# # Save those# write_csv(# wigley_bmspectra_16,# here::here("Data/processed/wigley_species_min16_bodymass_spectra.csv"))
Code
# Read them in, do some plottingwigley_bmspectra_16 <-read_csv( here::here("Data/processed/wigley_species_min16_bodymass_spectra.csv")) %>%mutate(survey_area =fct_relevel(survey_area, area_levels))
Code
# Model signigicant trendsbmspectra_16g_mod_wig <-lmer(formula = b ~ est_year * survey_area * season + (1|est_year),data = wigley_bmspectra_16)# Get the predictions and flag whether they are significanttrend_predictions_16g <-get_preds_and_trendsignif(bmspectra_16g_mod_wig) %>%mutate(model_id ="bodymass_spectra-Wigley Subset, 16g min") %>%separate(model_id, into =c("metric", "community"), sep ="-") %>%mutate(metric =fct_rev(metric),survey_area =fct_relevel(survey_area, area_levels))# Plot significant trends over observed data# Make the plot spectra_trends_16g <-ggplot() +geom_ribbon(data =filter(trend_predictions_16g, non_zero ==TRUE),aes(x, ymin = conf.low, ymax = conf.high, fill = season),linewidth =1, alpha =0.35) +geom_point(data = wigley_bmspectra_16,aes(est_year, b, color = season),size =0.8, alpha =0.5) +geom_line(data =filter(trend_predictions_16g, non_zero ==TRUE),aes(x, predicted, color = season),linewidth =1, alpha =1) +scale_fill_gmri() +scale_color_gmri() +scale_y_continuous(limits =c(-2.5, -0.8)) +facet_grid(survey_area ~ metric*community) +labs(x ="Year",y ="Exponent of Size Spectra")# Compare to previous# Make the plotwigley_trends_1g <-ggplot() +geom_ribbon(data =filter(trend_predictions, metric =="bodymass_spectra", non_zero ==TRUE),aes(x, ymin = conf.low, ymax = conf.high, fill = season),linewidth =1, alpha =0.35) +geom_point(data =filter(spectra_results, metric =="bodymass_spectra"),aes(est_year, b, color = season),size =0.8, alpha =0.5) +geom_line(data =filter(trend_predictions, metric =="bodymass_spectra", non_zero ==TRUE),aes(x, predicted, color = season),linewidth =1, alpha =1) +scale_fill_gmri() +scale_color_gmri() +scale_y_continuous(limits =c(-2.5, -0.8)) +facet_grid(survey_area ~ metric*community) +labs(x ="Year",y ="Exponent of Size Spectra")(wigley_trends_1g | spectra_trends_16g) +plot_layout(guides ="collect") &theme(legend.position ="bottom")
Code
# Prepare inputs directly:yr_op <-c(2000)reg <-"GoM"seas <-"Spring"# Data for the spectraactual_vals_1g <- wigley_in %>%filter( est_year %in% yr_op, survey_area == reg, season == seas, wmin_g >=1)actual_vals_16g <- wigley_in %>%filter( est_year %in% yr_op, survey_area == reg, season == seas, wmin_g >=16)
Sensitivity to minimum size
Code
# Load processing functionssource(here::here("Code/R/Processing_Functions.R"))# Get the slope over those yearsmle_results_1g <-group_binspecies_spectra(ss_input = actual_vals_1g,grouping_vars =c("est_year", "season", "survey_area"),abundance_vals ="numlen_adj",length_vals ="length_cm",use_weight =FALSE,isd_xmin =1,global_min =TRUE,isd_xmax =NULL,global_max =FALSE,bin_width =1,vdiff =2)# and again for 16gmle_results_16g <-group_binspecies_spectra(ss_input = actual_vals_16g,grouping_vars =c("est_year", "season", "survey_area"),abundance_vals ="numlen_adj",length_vals ="length_cm",use_weight =FALSE,isd_xmin =16,global_min =TRUE,isd_xmax =NULL,global_max =FALSE,bin_width =1,vdiff =2)
Code
#' @title Individual Size Distribution Plot Prep#' #' @description Prepares wmin_grams data to be plotted with the MLE#' size spectrum slope fit.#'#' @param biomass_data Data prepped for mle estimation, use prep_wmin_wmax#' @param min_weight_g Minimum weight cutoff to use to match bounded pareto minimum#'#' @return#' @export#'#' @examplesisd_plot_prep <-function(biomass_data = databin_split, min_weight_g =1){# Arrange by lower weight biomass_data <- dplyr::arrange(biomass_data, desc(wmin_g)) %>%select(wmin_g, wmax_g, numlen_adj)# truncate the data to in match the spectra fit biomass_data <- biomass_data %>%filter(wmin_g >= min_weight_g, wmin_g !=0,is.na(wmin_g) ==FALSE, wmax_g !=0,is.na(wmax_g) ==FALSE)# Number of fish for the year total_abundance <-sum(biomass_data$numlen_adj)# Have to do not with dplyr: wmin.vec <- biomass_data$wmin_g # Vector of lower weight bin limits wmax.vec <- biomass_data$wmax_g # Vector of upper weight bin limits num.vec <- biomass_data$numlen_adj # Vector of corresponding counts for those bins# doing it with purr and pmap biomass_bins <-select(biomass_data, wmin_g, wmax_g) %>%distinct(wmin_g, wmax_g)# Go row-by-row and get how many of any species falls into each# discrete size bin out_data <-pmap_dfr(biomass_bins, function(wmin_g, wmax_g){# 1. Count times wmin.vec >= individual wmin_g, multiply by number of fish for year countGTEwmin <-sum( (wmin.vec >= wmin_g) * num.vec)# 2. Count times wmin.vec >= individual wmax_g, multiply by number of fish for year lowCount <-sum( (wmin.vec >= wmax_g) * num.vec)# 3. Count times wmax.vec > individual wmin_g, multiply by number of fish for year highCount <-sum( (wmax.vec > wmin_g) * num.vec)# 4. build table out_table <-data.frame("wmin_g"= wmin_g,"wmax_g"= wmax_g,"countGTEwmin"= countGTEwmin,"lowCount"= lowCount,"highCount"= highCount ) })# return the purr versionreturn(out_data)}
# Control optionsmle_results <- mle_results_1gactual_vals <- actual_vals_1gmin_used <-1#### -- Plpot setup below ---# Power law parameters and summary details for the group of data:b.MLE <- mle_results$btotal_abundance <- mle_results$nb.confMin <- mle_results$confMinb.confMax <- mle_results$confMax# Create range of x values from the group to get power law predictions# min and max weights for power lawxmin <- min_usedxmax <-max(actual_vals$wmin_g)# Create x values (individual bodymass) to predict across# break up the Xlim into pieces between min and maxx.PLB <-seq(from = xmin,to = xmax,by =0.1) # get the length of that vectorx.PLB.length <-length(x.PLB) # Y values for plot limits/bounds/predictions from bounded power law pdfy.PLB <- (1- sizeSpectra::pPLB(x = x.PLB, b = b.MLE, xmin =min(x.PLB), xmax =max(x.PLB))) * total_abundancey.PLB.confMin <- (1- sizeSpectra::pPLB(x = x.PLB, b = b.confMin, xmin =min(x.PLB), xmax =max(x.PLB))) * total_abundancey.PLB.confMax <- (1- sizeSpectra::pPLB(x = x.PLB, b = b.confMax, xmin =min(x.PLB), xmax =max(x.PLB))) * total_abundance# Put it in a df to make it easierPLB_df <-data.frame(x.PLB = x.PLB,y.PLB = y.PLB,confMin = y.PLB.confMin,confMax = y.PLB.confMax) %>%mutate()# 2. solve the power law function again in mutate for residuals# need to do it twice for wmin_g and wmax_g# Aggregate across overlapping size rangesp_prepped <- actual_vals %>%isd_plot_prep(min_weight_g = min_used) %>%left_join(actual_vals) %>%select(comname, wmin_g, wmax_g, lowCount, highCount, countGTEwmin) %>%# Get the residualsmutate(# Estimated Abundancewmin_estimate = (1- sizeSpectra::pPLB(x = wmin_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,wmax_estimate = (1- sizeSpectra::pPLB(x = wmax_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,# Residualsb_resid = countGTEwmin - ((wmin_estimate + wmax_estimate)/2))##### Plotting Raw ##### Messing with the plot axesfit_1g_gom_2000 <-ggplot() +# geom_ribbon(# data = PLB_df, aes(x.PLB, ymin =confMin, ymax = confMax),# fill = gmri_cols("orange"), # alpha = 0.4) +geom_line(data = PLB_df, aes(x.PLB, y.PLB),color =gmri_cols("orange"), linewidth =1) +geom_point(data = p_prepped %>%filter(lowCount >0),aes(x = (wmin_g+wmax_g)/2,y = countGTEwmin),color =gmri_cols("gmri blue"),alpha =0.15,shape =1) +scale_y_log10(labels =trans_format("log10", math_format(10^.x)), limits =c(10^-1, 10^6)) +scale_x_log10(labels =trans_format("log10", math_format(10^.x)), limits =c(10^-1, 10^6)) +labs(x ="Weight (g)", y ="Abundance", title ="Wigley Species - GOM - Spring - 2000",subtitle ="1g Minimum Size")fit_1g_gom_2000
Code
# Control optionsmle_results <- mle_results_16gactual_vals <- actual_vals_16gmin_used <-16#### -- Plpot setup below ---# Power law parameters and summary details for the group of data:b.MLE <- mle_results$btotal_abundance <- mle_results$nb.confMin <- mle_results$confMinb.confMax <- mle_results$confMax# Create range of x values from the group to get power law predictions# min and max weights for power lawxmin <- min_usedxmax <-max(actual_vals$wmin_g)# Create x values (individual bodymass) to predict across# break up the Xlim into pieces between min and maxx.PLB <-seq(from = xmin,to = xmax,by =0.1) # get the length of that vectorx.PLB.length <-length(x.PLB) # Y values for plot limits/bounds/predictions from bounded power law pdfy.PLB <- (1- sizeSpectra::pPLB(x = x.PLB, b = b.MLE, xmin =min(x.PLB), xmax =max(x.PLB))) * total_abundancey.PLB.confMin <- (1- sizeSpectra::pPLB(x = x.PLB, b = b.confMin, xmin =min(x.PLB), xmax =max(x.PLB))) * total_abundancey.PLB.confMax <- (1- sizeSpectra::pPLB(x = x.PLB, b = b.confMax, xmin =min(x.PLB), xmax =max(x.PLB))) * total_abundance# Put it in a df to make it easierPLB_df <-data.frame(x.PLB = x.PLB,y.PLB = y.PLB,confMin = y.PLB.confMin,confMax = y.PLB.confMax) %>%mutate()# 2. solve the power law function again in mutate for residuals# need to do it twice for wmin_g and wmax_g# Aggregate across overlapping size rangesp_prepped <- actual_vals %>%isd_plot_prep(min_weight_g = min_used) %>%left_join(actual_vals) %>%select(comname, wmin_g, wmax_g, lowCount, highCount, countGTEwmin) %>%# Get the residualsmutate(# Estimated Abundancewmin_estimate = (1- sizeSpectra::pPLB(x = wmin_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,wmax_estimate = (1- sizeSpectra::pPLB(x = wmax_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,# Residualsb_resid = countGTEwmin - ((wmin_estimate + wmax_estimate)/2))##### Plotting Raw ##### Messing with the plot axesfit_16g_gom_2000 <-ggplot() +geom_line(data = PLB_df, aes(x.PLB, y.PLB),color =gmri_cols("orange"), linewidth =1) +geom_point(data = p_prepped %>%filter(lowCount >0),aes(x = (wmin_g+wmax_g)/2,y = countGTEwmin),color =gmri_cols("gmri blue"),alpha =0.15,shape =1) +scale_y_log10(labels =trans_format("log10", math_format(10^.x)), limits =c(10^-1, 10^6)) +scale_x_log10(labels =trans_format("log10", math_format(10^.x)), limits =c(10^-1, 10^6)) +labs(x ="Weight (g)", y ="Abundance", title ="Wigley Species - GOM - Spring - 2000",subtitle ="16g Minimum Size")fit_16g_gom_2000
Sensitivity to max size
Code
# Get the slope over those yearsmle_results_1g <-group_binspecies_spectra(ss_input = actual_vals_1g,grouping_vars =c("est_year", "season", "survey_area"),abundance_vals ="numlen_adj",length_vals ="length_cm",use_weight =FALSE,isd_xmin =1,global_min =TRUE,isd_xmax =10^5,global_max =TRUE,bin_width =1,vdiff =2)# and again for 16gmle_results_16g <-group_binspecies_spectra(ss_input = actual_vals_16g,grouping_vars =c("est_year", "season", "survey_area"),abundance_vals ="numlen_adj",length_vals ="length_cm",use_weight =FALSE,isd_xmin =16,global_min =TRUE,isd_xmax =10^5,global_max =TRUE,bin_width =1,vdiff =2)
# Control optionsmle_results <- mle_results_1gactual_vals <- actual_vals_1gmin_used <-1max_used <-10^5#### -- Plpot setup below ---# Power law parameters and summary details for the group of data:b.MLE <- mle_results$btotal_abundance <- mle_results$nb.confMin <- mle_results$confMinb.confMax <- mle_results$confMax# Create range of x values from the group to get power law predictions# min and max weights for power lawxmin <- min_usedxmax <-max(actual_vals$wmin_g)# Create x values (individual bodymass) to predict across# break up the Xlim into pieces between min and maxx.PLB <-seq(from = xmin,to = xmax,by =0.1) # get the length of that vectorx.PLB.length <-length(x.PLB) # Y values for plot limits/bounds/predictions from bounded power law pdfy.PLB <- (1- sizeSpectra::pPLB(x = x.PLB, b = b.MLE, xmin =min(x.PLB), xmax =max(x.PLB))) * total_abundancey.PLB.confMin <- (1- sizeSpectra::pPLB(x = x.PLB, b = b.confMin, xmin =min(x.PLB), xmax =max(x.PLB))) * total_abundancey.PLB.confMax <- (1- sizeSpectra::pPLB(x = x.PLB, b = b.confMax, xmin =min(x.PLB), xmax =max(x.PLB))) * total_abundance# Put it in a df to make it easierPLB_df <-data.frame(x.PLB = x.PLB,y.PLB = y.PLB,confMin = y.PLB.confMin,confMax = y.PLB.confMax) %>%mutate()# 2. solve the power law function again in mutate for residuals# need to do it twice for wmin_g and wmax_g# Aggregate across overlapping size rangesp_prepped <- actual_vals %>%isd_plot_prep(min_weight_g = min_used) %>%left_join(actual_vals) %>%select(comname, wmin_g, wmax_g, lowCount, highCount, countGTEwmin) %>%# Get the residualsmutate(# Estimated Abundancewmin_estimate = (1- sizeSpectra::pPLB(x = wmin_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,wmax_estimate = (1- sizeSpectra::pPLB(x = wmax_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,# Residualsb_resid = countGTEwmin - ((wmin_estimate + wmax_estimate)/2))##### Plotting Raw ##### Messing with the plot axesfit_1g_gom_2000 <-ggplot() +geom_line(data = PLB_df, aes(x.PLB, y.PLB),color =gmri_cols("orange"), linewidth =1) +geom_point(data = p_prepped %>%filter(lowCount >0),aes(x = (wmin_g+wmax_g)/2,y = countGTEwmin),color =gmri_cols("gmri blue"),alpha =0.15,shape =1) +scale_y_log10(labels =trans_format("log10", math_format(10^.x)), limits =c(10^-1, 10^6)) +scale_x_log10(labels =trans_format("log10", math_format(10^.x)), limits =c(10^-1, 10^6)) +labs(x ="Weight (g)", y ="Abundance", title ="Wigley Species - GOM - Spring - 2000",subtitle ="1g Minimum Size, 10^5 max size limit")fit_1g_gom_2000
Code
# Control optionsmle_results <- mle_results_16gactual_vals <- actual_vals_16gmin_used <-16max_used <-10^5#### -- Plpot setup below ---# Power law parameters and summary details for the group of data:b.MLE <- mle_results$btotal_abundance <- mle_results$nb.confMin <- mle_results$confMinb.confMax <- mle_results$confMax# Create range of x values from the group to get power law predictions# min and max weights for power lawxmin <- min_usedxmax <- max_used# Create x values (individual bodymass) to predict across# break up the Xlim into pieces between min and maxx.PLB <-seq(from = xmin,to = xmax,by =0.1) # get the length of that vectorx.PLB.length <-length(x.PLB) # Y values for plot limits/bounds/predictions from bounded power law pdfy.PLB <- (1- sizeSpectra::pPLB(x = x.PLB, b = b.MLE, xmin =min(x.PLB), xmax =max(x.PLB))) * total_abundancey.PLB.confMin <- (1- sizeSpectra::pPLB(x = x.PLB, b = b.confMin, xmin =min(x.PLB), xmax =max(x.PLB))) * total_abundancey.PLB.confMax <- (1- sizeSpectra::pPLB(x = x.PLB, b = b.confMax, xmin =min(x.PLB), xmax =max(x.PLB))) * total_abundance# Put it in a df to make it easierPLB_df <-data.frame(x.PLB = x.PLB,y.PLB = y.PLB,confMin = y.PLB.confMin,confMax = y.PLB.confMax) %>%mutate()# 2. solve the power law function again in mutate for residuals# need to do it twice for wmin_g and wmax_g# Aggregate across overlapping size rangesp_prepped <- actual_vals %>%isd_plot_prep(min_weight_g = min_used) %>%left_join(actual_vals) %>%select(comname, wmin_g, wmax_g, lowCount, highCount, countGTEwmin) %>%# Get the residualsmutate(# Estimated Abundancewmin_estimate = (1- sizeSpectra::pPLB(x = wmin_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,wmax_estimate = (1- sizeSpectra::pPLB(x = wmax_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,# Residualsb_resid = countGTEwmin - ((wmin_estimate + wmax_estimate)/2))##### Plotting Raw ##### Messing with the plot axesfit_16g_gom_2000 <-ggplot() +geom_line(data = PLB_df, aes(x.PLB, y.PLB),color =gmri_cols("orange"), linewidth =1) +geom_point(data = p_prepped %>%filter(lowCount >0),aes(x = (wmin_g+wmax_g)/2,y = countGTEwmin),color =gmri_cols("gmri blue"),alpha =0.15,shape =1) +scale_y_log10(labels =trans_format("log10", math_format(10^.x)), limits =c(10^-1, 10^6)) +scale_x_log10(labels =trans_format("log10", math_format(10^.x)), limits =c(10^-1, 10^6)) +labs(x ="Weight (g)", y ="Abundance", title ="Wigley Species - GOM - Spring - 2000",subtitle ="16g Minimum Size, 10^5 max size limit")fit_16g_gom_2000